*******************************************************************************
*          	RISK PREDICTION MODELS ON NON-MIDNIGHT HOURS, 2010-2016 		  *
*   Last edited: 1/29/2020 													  *
*******************************************************************************

log using "${SIDCodePath}/sidsedd_riskpredict.log", replace
use "${SIDDataPath}/riskpredict/fl_med_1016_allrecent.dta", clear

*--------------------------------------------------
* Define variables
*--------------------------------------------------
	gen 	edhour_2 = edhour
	replace edhour_2 = (2400 - edhour)*(-1) if edhour > 1200

	// Create subsamples depending on how many hours away from midnight they were
	forval i = 1/12{
		gen 	hourstilMN`i' 	  		= 0
		replace hourstilMN`i' 			= 1 ///
										if edhour_2 >= (-100)*`i' & edhour_2 <= 100*`i' - 100
	}

	// Generate post-policy dummy
	gen 	dqtr_2 						= (dqtr - 1)/4
	gen 	year_dqtr 					= year + dqtr_2 /* This value is defined s.t. 2012.0 = first quarter of 2012, 2012.25 = second quarter of 2012, etc... */
	gen 	post_qtr 					= 0
	replace post_qtr 					= 1 if year_dqtr >= 2013.75

	// After-midnight dummy
	gen 	after_MN 					= 0
	replace after_MN 					= 1 ///
										if edhour_2 >= 0

	gen 	post_qtr_after_MN 			= post_qtr * after_MN

	// Observation dummy (since hcup_os == 3 when observation status)
	gen 	obs 						= cond(hcup_os > 0, 1, 0)
	gen     out_notobs                  = cond(hcup_os == 0 & inpatient == 0, 1, 0)

	// Generate dummy which is 1 if in medicare, and 0 if not medicare and age <= 50 (to reduce spillovers)
	gen 	medicare50 					= .
	replace medicare50 					= 0 ///
										if age <= 50 & medicare == 0
	replace medicare50 					= 1 ///
										if medicare == 1

	// Interact medicare dummy with post_qtr and after_MN
	gen 	after_MN_medicare 			= after_MN * medicare
	gen 	post_qtr_medicare 			= post_qtr * medicare
	gen 	after_MN_post_qtr_medicare 	= post_qtr_after_MN * medicare

	// Interact traditional medicare dummy with post_qtr and after_MN
	gen 	after_MN_tradmedicare 			= after_MN * trad_medicare
	gen 	post_qtr_tradmedicare 			= post_qtr * trad_medicare
	gen 	after_MN_post_qtr_tradmedicare 	= post_qtr_after_MN * trad_medicare

	//Interact medicare dummy with post_qtr and after_MN
	gen 	after_MN_medicare50 		= after_MN * medicare50
	gen 	post_qtr_medicare50 		= post_qtr * medicare50
	gen 	after_MN_post_qtr_medicare50 = post_qtr_after_MN * medicare50

	// Convert variables from string to number so they can be used as a factor variable
	encode dshospid, 		gen(dshospid2)
	encode pointoforigin_x, gen(pointoforigin2)
	encode race_x, 			gen(race2)
	encode hispanic_x, 		gen(hispanic2)

	// Generate age bins
	egen agebin = cut(age), at(5,10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 105, 110, 115, 120, 125)

*--------------------------------------------------
* Generate outcome variables
*--------------------------------------------------
	* MODEL A: outcome = inpatient
	* MODEL B: outcome = inpatient or inpatient within 30 days
	* MODEL C: outcome = inpatient, LOS > 2
	* MODEL D: outcome = inpatient, LOS > 2 or inpatient, LOS > 2 within 30 days

gen outA = inpatient
gen outB = cond(inpatient == 1 | (n_inpatient_nxt30days > 0), 1, 0)
gen outC = los_ip_gt2
gen outD = cond(los_ip_gt2 | (n_ipgt2_nxt30days > 0), 1, 0)

*--------------------------------------------------
* Create lists of RHS variables
*--------------------------------------------------
	* DEMOGRAPHICS
		* point of origin
		* agebin
		* race/hispanic
		* median income of zip code
		* sex
	local demo "agebin##female i.race2 i.hispanic2 i.pointoforigin2 i.medincstq"

	* GENERAL INFO FROM CURRENT VISIT
		* hospital
		* ed hour
		* quarter
	local currentvisit_gen "i.dshospid2 i.edhour i.dqtr"

	* DIAGNOSIS AND CHRONIC CONDITIONS FROM CURRENT VISIT
		* primary diagnosis this visit
		* chronic conditions dummies
		
	local currentvisit_dx "i.dxccs1 d_chron_bs*"

	* PREVIOUS VISITS
		* n of visits within 30/365 days
		* n inpatient within 30/365 days
		* n visit los within 30/365 days
		* n inpatient los within 30/365 days
	local prevvisit30_gen 	"n_visits_30days n_inpatient_30days n_los_ip_30days n_los_ED_30days*"
	local prevvisit365_gen 	"n_visits_365days n_inpatient_365days n_los_ip_365days n_los_ED_365days*"

	* PREVIOUS DIAGNOSES/PROCEDURE
		* dummies of diagnoses within 30/365 days
		* dummies of procedures within 30/365 days
	local prevvvisit30_dx	"d_dxccs_*_30days"
	local prevvvisit365_dx  "d_dxccs_*_365days"
	local prevvvisit30_pr	"d_prccs_*_30days"
	local prevvvisit365_pr  "d_prccs_*_365days"


*--------------------------------------------------
* Run regressions
	* the prediction in the main text is Model "B4"
*--------------------------------------------------
// MODEL 1: demographics only
local RHS1 "`demo'"

// MODEL 2: demographics + current visit information
local RHS2 "`RHS1' `currentvisit_gen' `currentvisit_dx'"

// MODEL 3: demographics + current visit information + 30 days
local RHS3 "`RHS2' `prevvisit30_gen' `prevvisit30_dx' `prevvisit30_pr'"

// MODEL 4: demographics + current visit information + 30 days + 365 days
local RHS4 "`RHS3' `prevvisit365_gen' `prevvisit365_dx' `prevvisit365_pr'"

local modelclasses "B"
foreach outvar of local modelclasses{
	forval invar = 4/4{
		capture confirm variable pred_`outvar'`invar'
		if _rc{
			preserve
				// Run only on cases between 9AM-3PM (i.e., *outside* of the range within 9 hours of midnight on each side, which is 3PM - 9AM)
				keep if hourstilMN9 == 0 
				di "Now running: logit out`outvar' RHS`invar'"
				qui logit out`outvar' `RHS`invar''
				estimates save "${SIDDataPath}/riskpredict/logit_`outvar'`invar'_9h.ster", replace
			restore
			predict pred_`outvar'`invar', xb // then predict on everyone else
		}
	}
}

keep key pred* 

foreach predvar of varlist pred*{
		xtile quint_`predvar' = `predvar', n(5)
		xtile dec_`predvar'   = `predvar', n(10)
	}

save "${SIDDataPath}/riskpredict/fl_med_1016_all_pred.dta", replace

log close
